{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e7ebbf27",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1获取数据\n",
    "import pandas as pd\n",
    "\n",
    "column_names = [\"Class\",\"age\",\"menopause\",\"tumor-size\",\"inv-nodes\",\"node-caps\",\"deg-malig\",\"breast\",\"breast-quad\",\"irradiat\"]\n",
    "data = pd.read_csv(\"./data/breast-cancer.data\",names=column_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e92bd951",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b2d56930",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Class</th>\n",
       "      <th>age</th>\n",
       "      <th>menopause</th>\n",
       "      <th>tumor-size</th>\n",
       "      <th>inv-nodes</th>\n",
       "      <th>node-caps</th>\n",
       "      <th>deg-malig</th>\n",
       "      <th>breast</th>\n",
       "      <th>breast-quad</th>\n",
       "      <th>irradiat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "      <td>30-39</td>\n",
       "      <td>premeno</td>\n",
       "      <td>30-34</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>3</td>\n",
       "      <td>left</td>\n",
       "      <td>left_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>20-24</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>right_up</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>20-24</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>left</td>\n",
       "      <td>left_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "      <td>60-69</td>\n",
       "      <td>ge40</td>\n",
       "      <td>15-19</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>left_up</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>0-4</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>right_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Class    age menopause tumor-size inv-nodes node-caps  \\\n",
       "0  no-recurrence-events  30-39   premeno      30-34       0-2        no   \n",
       "1  no-recurrence-events  40-49   premeno      20-24       0-2        no   \n",
       "2  no-recurrence-events  40-49   premeno      20-24       0-2        no   \n",
       "3  no-recurrence-events  60-69      ge40      15-19       0-2        no   \n",
       "4  no-recurrence-events  40-49   premeno        0-4       0-2        no   \n",
       "\n",
       "   deg-malig breast breast-quad irradiat  \n",
       "0          3   left    left_low       no  \n",
       "1          2  right    right_up       no  \n",
       "2          2   left    left_low       no  \n",
       "3          2  right     left_up       no  \n",
       "4          2  right   right_low       no  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5fcf8bda",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2数据预处理\n",
    "# 特征值和目标值划分\n",
    "x = data.iloc[:,1:]\n",
    "y = data.iloc[:,0:1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2e586b29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>menopause</th>\n",
       "      <th>tumor-size</th>\n",
       "      <th>inv-nodes</th>\n",
       "      <th>node-caps</th>\n",
       "      <th>deg-malig</th>\n",
       "      <th>breast</th>\n",
       "      <th>breast-quad</th>\n",
       "      <th>irradiat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30-39</td>\n",
       "      <td>premeno</td>\n",
       "      <td>30-34</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>3</td>\n",
       "      <td>left</td>\n",
       "      <td>left_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>20-24</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>right_up</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>20-24</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>left</td>\n",
       "      <td>left_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60-69</td>\n",
       "      <td>ge40</td>\n",
       "      <td>15-19</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>left_up</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>40-49</td>\n",
       "      <td>premeno</td>\n",
       "      <td>0-4</td>\n",
       "      <td>0-2</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>right</td>\n",
       "      <td>right_low</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     age menopause tumor-size inv-nodes node-caps  deg-malig breast  \\\n",
       "0  30-39   premeno      30-34       0-2        no          3   left   \n",
       "1  40-49   premeno      20-24       0-2        no          2  right   \n",
       "2  40-49   premeno      20-24       0-2        no          2   left   \n",
       "3  60-69      ge40      15-19       0-2        no          2  right   \n",
       "4  40-49   premeno        0-4       0-2        no          2  right   \n",
       "\n",
       "  breast-quad irradiat  \n",
       "0    left_low       no  \n",
       "1    right_up       no  \n",
       "2    left_low       no  \n",
       "3     left_up       no  \n",
       "4   right_low       no  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "02aa3e1f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Class\n",
       "0  no-recurrence-events\n",
       "1  no-recurrence-events\n",
       "2  no-recurrence-events\n",
       "3  no-recurrence-events\n",
       "4  no-recurrence-events"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c1ed228c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 训练集和测试集划分\n",
    "from sklearn.model_selection import train_test_split\n",
    "x_train,x_test,y_train,y_test = train_test_split(x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "df8f7bd0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3特征工程 \n",
    "# 特征提取\n",
    "x_train = x_train.to_dict(orient=\"records\")\n",
    "x_test = x_test.to_dict(orient=\"records\")\n",
    "# y_train = y_train.to_dict(orient=\"records\")\n",
    "# y_test = y_test.to_dict(orient=\"records\")\n",
    "from sklearn.feature_extraction import DictVectorizer\n",
    "transfer = DictVectorizer(sparse=False)\n",
    "x_train = transfer.fit_transform(x_train)\n",
    "x_test = transfer.transform(x_test)\n",
    "# y_train = transfer.fit_transform(y_train)\n",
    "# y_test = transfer.transform(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e4fd5296",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                    Class\n",
      "110  no-recurrence-events\n",
      "203     recurrence-events\n",
      "282     recurrence-events\n",
      "236     recurrence-events\n",
      "220     recurrence-events\n",
      "..                    ...\n",
      "22   no-recurrence-events\n",
      "173  no-recurrence-events\n",
      "125  no-recurrence-events\n",
      "2    no-recurrence-events\n",
      "253     recurrence-events\n",
      "\n",
      "[214 rows x 1 columns]\n"
     ]
    }
   ],
   "source": [
    "print(y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "689bfab4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\D_APP\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression()"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4 逻辑回归算法训练-模型\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "estimator = LogisticRegression()\n",
    "estimator.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a8e01114",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率:\n",
      " 0.6666666666666666\n"
     ]
    }
   ],
   "source": [
    "# 5模型评估\n",
    "y_predict = estimator.predict(x_test)\n",
    "# print(\"预测值和真实值对比:\\n\",y_predict == y_test)\n",
    "print(\"准确率:\\n\",estimator.score(x_test,y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9c09c08b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'recurrence-events', 'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events',\n",
       "       'no-recurrence-events', 'recurrence-events',\n",
       "       'no-recurrence-events', 'no-recurrence-events'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e39f53b0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>recurrence-events</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>no-recurrence-events</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    Class\n",
       "116  no-recurrence-events\n",
       "48   no-recurrence-events\n",
       "73   no-recurrence-events\n",
       "266     recurrence-events\n",
       "118  no-recurrence-events"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "954a4090",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 模型评估 精确率、召回率、f1-score\n",
    "from sklearn.metrics import classification_report\n",
    "report = classification_report(y_test,y_predict,labels=['recurrence-events','no-recurrence-events'],target_names=['recurrence-events','no-recurrence-events'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "faa473c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                      precision    recall  f1-score   support\n",
      "\n",
      "   recurrence-events       0.60      0.23      0.33        26\n",
      "no-recurrence-events       0.68      0.91      0.78        46\n",
      "\n",
      "            accuracy                           0.67        72\n",
      "           macro avg       0.64      0.57      0.56        72\n",
      "        weighted avg       0.65      0.67      0.62        72\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(report)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "9f121668",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 模型评估 auc计算\n",
    "from sklearn.metrics import roc_auc_score\n",
    "import numpy as np\n",
    "y_true = np.where(y_test == 'recurrence-events', 1, 0)\n",
    "y_score = np.where(y_predict == 'recurrence-events', 1, 0)\n",
    "auc_score = roc_auc_score(y_true,y_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "faf9ad68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5719063545150502"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "071798dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [0]\n",
      " [0]\n",
      " [0]]\n"
     ]
    }
   ],
   "source": [
    "print(y_true)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "9f8315ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_score = np.where(y_predict == 'recurrence-events', 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6f9bdfee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
       "       0, 1, 1, 0, 0, 0])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "f3c1c6c6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'recurrence-events'\n",
      " 'no-recurrence-events' 'recurrence-events' 'recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'\n",
      " 'no-recurrence-events' 'recurrence-events' 'recurrence-events'\n",
      " 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events']\n"
     ]
    }
   ],
   "source": [
    "print(y_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "46bfaed8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                    Class\n",
      "271     recurrence-events\n",
      "101  no-recurrence-events\n",
      "63   no-recurrence-events\n",
      "60   no-recurrence-events\n",
      "248     recurrence-events\n",
      "..                    ...\n",
      "170  no-recurrence-events\n",
      "154  no-recurrence-events\n",
      "30   no-recurrence-events\n",
      "81   no-recurrence-events\n",
      "46   no-recurrence-events\n",
      "\n",
      "[72 rows x 1 columns]\n"
     ]
    }
   ],
   "source": [
    "print(y_test)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
